#Load in the Packages
library(USAboundaries)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(downloader)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#Load in the Data
csv_temp <- tempfile()
download("https://raw.githubusercontent.com/WJC-Data-Science/DTS350/master/permits.csv", csv_temp, mode = "wb")
permit_data <- read_csv(csv_temp)
## Warning: Missing column names filled in: 'X1' [1]
##
## -- Column specification --------------------------------------------------------
## cols(
## X1 = col_double(),
## state = col_double(),
## StateAbbr = col_character(),
## county = col_double(),
## countyname = col_character(),
## variable = col_character(),
## year = col_double(),
## value = col_double()
## )
#look at the Data and How it was brought in.
head(permit_data)
## # A tibble: 6 x 8
## X1 state StateAbbr county countyname variable year value
## <dbl> <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 1 1 AL 1 Autauga County All Permits 2010 191
## 2 2 1 AL 1 Autauga County All Permits 2009 110
## 3 3 1 AL 1 Autauga County All Permits 2008 173
## 4 4 1 AL 1 Autauga County All Permits 2007 260
## 5 5 1 AL 1 Autauga County All Permits 2006 347
## 6 6 1 AL 1 Autauga County All Permits 2005 313
str(permit_data)
## spec_tbl_df [327,422 x 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ X1 : num [1:327422] 1 2 3 4 5 6 7 8 9 10 ...
## $ state : num [1:327422] 1 1 1 1 1 1 1 1 1 1 ...
## $ StateAbbr : chr [1:327422] "AL" "AL" "AL" "AL" ...
## $ county : num [1:327422] 1 1 1 1 1 1 1 1 1 1 ...
## $ countyname: chr [1:327422] "Autauga County" "Autauga County" "Autauga County" "Autauga County" ...
## $ variable : chr [1:327422] "All Permits" "All Permits" "All Permits" "All Permits" ...
## $ year : num [1:327422] 2010 2009 2008 2007 2006 ...
## $ value : num [1:327422] 191 110 173 260 347 313 367 283 276 400 ...
## - attr(*, "spec")=
## .. cols(
## .. X1 = col_double(),
## .. state = col_double(),
## .. StateAbbr = col_character(),
## .. county = col_double(),
## .. countyname = col_character(),
## .. variable = col_character(),
## .. year = col_double(),
## .. value = col_double()
## .. )
tail(permit_data)
## # A tibble: 6 x 8
## X1 state StateAbbr county countyname variable year value
## <dbl> <dbl> <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 327417 56 WY 45 Weston County 2-Unit Multifamily 1980 4
## 2 327418 56 WY 45 Weston County 3 & 4-Unit Multifamily 2004 4
## 3 327419 56 WY 45 Weston County 3 & 4-Unit Multifamily 1982 8
## 4 327420 56 WY 45 Weston County 3 & 4-Unit Multifamily 1981 20
## 5 327421 56 WY 45 Weston County 5+-Unit Multifamily 2000 10
## 6 327422 56 WY 45 Weston County 5+-Unit Multifamily 1981 8
#Merge the the two Dataframes
zip_codes <- state_codes %>%
mutate(state = as.integer(state_code))
state_permits <- merge(permit_data, zip_codes,by = "state") %>%
group_by(state_name,year) %>%
summarise(across(value, sum))
## `summarise()` has grouped output by 'state_name'. You can override using the `.groups` argument.
head(state_permits)
## # A tibble: 6 x 3
## # Groups: state_name [1]
## state_name year value
## <chr> <dbl> <dbl>
## 1 Alabama 1980 38954
## 2 Alabama 1981 24592
## 3 Alabama 1982 21454
## 4 Alabama 1983 43537
## 5 Alabama 1984 37676
## 6 Alabama 1985 41725
# The first plot is to see the overall permit data by state
state_permit_plot <- ggplot(data = state_permits, aes(x = year, y = value/1000, color = state_name)) +
geom_point(aes(text = paste("State:",state_name))) +
geom_line() +
geom_vline(xintercept = 2008, linetype = 'dotted') +
labs(x = 'Time in (Years)', y = 'Number of Permits', title = 'Permits by State') +
theme_bw()+
theme(legend.position = 'none')
## Warning: Ignoring unknown aesthetics: text
ggplotly(state_permit_plot)
#I wish there was a better way to graph the data without having it all on top of each other
#I unerstand when we make it interactive we can see what each data point is from.
#We can See that California sold the most in 1986. What I find interesting are the top three.
#when looking at the top three we can see that Florida, Texas, And California had a growning population
# we should look how thay compare individually over time.
#Top three States
Top_three <- filter(state_permits, state_name %in% c('Florida', 'California', 'Texas'))
Plot_top_three <- ggplot(data = Top_three, aes(x = year, y = value/1000, color = state_name))+
geom_point() +
geom_line() +
geom_vline(xintercept = 2008, linetype = 'dotted') +
labs(x = 'Time', y = 'Number of Permits', title = 'Top 3 Permit Sates') +
theme_bw()
ggplotly(Plot_top_three)
# after adding in the 2008 Market crash to the data we can see that these three big states
#took a big hit. They all continued tp drop after the crash even though these three states have
# the highest populations in the united states.